﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Net;
using System.IO;

namespace ASPNETX
{
    public class HTMLHelper
    {
        /// <summary>
        /// 删除HTML标签
        /// </summary>
        /// <param name="html"></param>
        /// <returns></returns>
        public static string ParseHtml(string html)
        {
            string temp = Regex.Replace(html, "<[^>]*>", "");
            return temp.Replace("&nbsp;", " ");
        }

        /// <summary>
        /// 获得HTML中Title部分的内容
        /// </summary>
        /// <param name="html"></param>
        /// <returns></returns>
        public static string GetTitle(string html)
        {
            Match m = Regex.Match(html, "<title>(.*)</title>");
            if (m.Groups.Count == 2)
                return m.Groups[1].Value;
            return "(unknown)";
        }

        /// <summary>
        /// 根据URL地址获得HTML文本
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        public static string GetURLContent(string url)
        {
            string result = "";

            WebRequest _request = WebRequest.Create(url);
            WebResponse _response = _request.GetResponse();

            Stream _stream = _response.GetResponseStream();

            using (StreamReader responseReader = new StreamReader(_stream, System.Text.Encoding.GetEncoding("GB2312")))
            {
                result = responseReader.ReadToEnd();
            }

            return result;
        }
    }
}
